{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Playground: RAG with Anthropic Claude 3 Models via Bedrock & Elasticsearch Python Client\n",
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elastic/elasticsearch-labs/blob/main/notebooks/playground/bedrock-anthropic-elasticsearch-client.ipynb)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "zQlYpYkI46Ff",
    "outputId": "83677846-8a6a-4b49-fde0-16d473778814"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "poetry 1.6.1 requires build<0.11.0,>=0.10.0, but you have build 1.0.3 which is incompatible.\n",
      "poetry 1.6.1 requires jsonschema<4.18.0,>=4.10.0, but you have jsonschema 4.19.1 which is incompatible.\u001b[0m\u001b[31m\n",
      "\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1.1\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!pip install -qU elasticsearch \"anthropic[bedrock]\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "GCZR7-zK810e"
   },
   "source": [
    "## Example Integration code\n",
    "To adapt this example for your use-case:\n",
    "- Update your connection details in the `es_client`\n",
    "- Replace the es_query with the query suggested in Playground\n",
    "- Replace the index_source_fields with the fields you want to use for context, per index. \n",
    "- Update the prompt to reflect the Plaground prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "DofNZ2w25nIr"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This document appears to be a set of company policies and guidelines related to:\n",
      "\n",
      "1. A full-time work-from-home policy [Position 1-31]\n",
      "2. A vacation policy [Position 32-56]\n",
      "3. A career leveling matrix for software engineers [Position 57-end]\n",
      "\n",
      "The work-from-home policy outlines the purpose, scope, eligibility, equipment/resources, workspace requirements, communication expectations, work hours, performance standards, time tracking, confidentiality, and health/well-being considerations for employees working remotely full-time.\n",
      "\n",
      "The vacation policy covers vacation accrual rates, scheduling procedures, vacation pay calculations, handling unplanned absences, and payouts for unused vacation time upon termination.\n",
      "\n",
      "The career leveling matrix defines the roles, responsibilities, and required skills/experience for three levels of software engineering positions: Junior Software Engineer, Senior Software Engineer, and Principal Software Engineer. It serves as a framework for employee development, performance evaluations, and career progression within the software engineering function.\n"
     ]
    }
   ],
   "source": [
    "from elasticsearch import Elasticsearch\n",
    "from anthropic import AnthropicBedrock\n",
    "\n",
    "from getpass import getpass\n",
    "\n",
    "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n",
    "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
    "\n",
    "# Update the Elasticsearch client with your own Elasticsearch instance\n",
    "es_client = Elasticsearch(\n",
    "    \"http://localhost:9200\",\n",
    "    api_key=ELASTIC_API_KEY,\n",
    ")\n",
    "\n",
    "completion_client = AnthropicBedrock(\n",
    "    # Authenticate by either providing the keys below or use the default AWS credential providers, such as\n",
    "    # using ~/.aws/credentials or the \"AWS_SECRET_ACCESS_KEY\" and \"AWS_ACCESS_KEY_ID\" environment variables.\n",
    "    aws_access_key=\"AWS_ACCESS_KEY_ID\",\n",
    "    aws_secret_key=\"AWS_SECRET_ACCESS_KEY\",\n",
    "    aws_region=\"us-west-2\",\n",
    ")\n",
    "\n",
    "# Update the source fields based on your context field options\n",
    "index_source_fields = {\"bm25-index\": [\"text\"]}\n",
    "\n",
    "\n",
    "def get_elasticsearch_results(query):\n",
    "    # Update the query to match your retriever provided in Playground\n",
    "    es_query = {\n",
    "        \"retriever\": {\n",
    "            \"standard\": {\"query\": {\"multi_match\": {\"query\": query, \"fields\": [\"text\"]}}}\n",
    "        },\n",
    "        \"size\": 3,\n",
    "    }\n",
    "\n",
    "    result = es_client.search(index=\"bm25-index\", body=es_query)\n",
    "    return result[\"hits\"][\"hits\"]\n",
    "\n",
    "\n",
    "def create_prompt(question, results):\n",
    "    context = \"\"\n",
    "    for hit in results:\n",
    "        inner_hit_path = f\"{hit['_index']}.{index_source_fields.get(hit['_index'])[0]}\"\n",
    "\n",
    "        ## For semantic_text matches, we need to extract the text from the inner_hits\n",
    "        if \"inner_hits\" in hit and inner_hit_path in hit[\"inner_hits\"]:\n",
    "            context += \"\\n --- \\n\".join(\n",
    "                inner_hit[\"_source\"][\"text\"]\n",
    "                for inner_hit in hit[\"inner_hits\"][inner_hit_path][\"hits\"][\"hits\"]\n",
    "            )\n",
    "        else:\n",
    "            source_field = index_source_fields.get(hit[\"_index\"])[0]\n",
    "            hit_context = hit[\"_source\"][source_field]\n",
    "            context += f\"{hit_context}\\n\"\n",
    "\n",
    "    # Update the prompt based on your own requirements\n",
    "    prompt = f\"\"\"\n",
    "  Instructions:\n",
    "  \n",
    "  - You are an assistant for question-answering tasks.\n",
    "  - Answer questions truthfully and factually using only the information presented.\n",
    "  - If you don't know the answer, just say that you don't know, don't make up an answer!\n",
    "  - You must always cite the document where the answer was extracted using inline academic citation style [], using the position.\n",
    "  - Use markdown format for code examples.\n",
    "  - You are correct, factual, precise, and reliable.\n",
    "  \n",
    "\n",
    "  Context:\n",
    "  {context}\n",
    "\n",
    "  Question: {question}\n",
    "  Answer:\n",
    "  \"\"\"\n",
    "\n",
    "    return prompt\n",
    "\n",
    "\n",
    "def generate_completion(user_prompt):\n",
    "    response = completion_client.messages.create(\n",
    "        model=\"anthropic.claude-3-sonnet-20240229-v1:0\",\n",
    "        messages=[\n",
    "            {\"role\": \"user\", \"content\": user_prompt},\n",
    "        ],\n",
    "        max_tokens=256,\n",
    "    )\n",
    "\n",
    "    return response.content\n",
    "\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    question = \"what is this?\"\n",
    "    elasticsearch_results = get_elasticsearch_results(question)\n",
    "    context_prompt = create_prompt(question, elasticsearch_results)\n",
    "    completion = generate_completion(context_prompt)\n",
    "    print(completion[0].text)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "include_colab_link": true,
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
